In [1]:
import plotly.express as px
import pandas as pd
In [2]:
# df = pd.read_parquet("era5-pds/measurements-m1.parquet")
# df = pd.read_parquet("era5-pds/measurements-i10k.parquet")
# df = pd.read_parquet("era5-pds/measurements-ryzen3.parquet")
# df = pd.read_parquet("era5-pds/measurements-i13k.parquet")
# df = pd.read_parquet("era5-pds/measurements-i13k-always-split.parquet")
df = pd.read_parquet("era5-pds/measurements-i13k-never-split.parquet")
df = df.query("clevel > 0")  # get rid of no compression results
In [3]:
category_orders = {"dset": ["flux", "wind", "pressure", "precip", "snow"],
                   "filter": ["nofilter", "shuffle", "bitshuffle", "bytedelta"]}
labels = {
    "cratio": "Compression ratio (x times)",
    "cspeed": "Compression speed (GB/s)",
    "dspeed": "Decompression speed (GB/s)",
    "codec": "Codec",
    "dset": "Dataset",
    "filter": "Filter",
    "cratio * cspeed": "Compression ratio x Compression speed",
    "cratio * dspeed": "Compression ratio x Decompression speed",
    "cratio * cspeed * dspeed": "Compression ratio x Compression x Decompression speeds",
    }
In [4]:
hover_data = {"filter": False, "codec": True, "cratio": ':.1f', "cspeed": ':.2f',
              "dspeed": ':.2f', "dset": True, "clevel": True}
fig = px.box(df, x="cratio", color="filter", points="all", hover_data=hover_data,
             labels=labels, range_x=(0, 60), range_y=(-.4, .35),)
fig.update_layout(
    title={
        'text': "Compression ratio vs filter (larger is better)",
        #'y':0.9,
        'x':0.25,
        'xanchor': 'left',
        #'yanchor': 'top'
    },
    #xaxis_title="Filter",
)
fig.show()
In [5]:
hover_data = {"filter": False, "codec": True, "cratio": ':.1f', "cspeed": ':.2f', "dspeed": ':.2f',
              "dset": False, "clevel": True}
fig = px.strip(df, y="cratio", x="dset", color="filter", hover_data=hover_data, labels=labels,
               category_orders=category_orders)
fig.show()
In [6]:
hover_data = {"filter": False, "codec": False, "cratio": ':.1f', "cspeed": ':.2f', "dspeed": ':.2f',
              "dset": True, "clevel": True}
fig = px.strip(df, y="cratio", x="codec", color="filter", labels=labels, hover_data=hover_data)
fig.show()
In [7]:
df["cratio * cspeed"] = df["cratio"] * df["cspeed"]
df["cratio * dspeed"] = df["cratio"] * df["dspeed"]
df["cratio * cspeed * dspeed"] = df["cratio"] * df["cspeed"] * df["dspeed"]
df_mean = df.groupby(['filter', 'clevel', 'codec']).mean(numeric_only=True).reset_index(level=[0,1,2])
df_mean2 = df.groupby(['filter', 'dset']).mean(numeric_only=True).reset_index(level=[0,1])
df_mean
Out[7]:
filter clevel codec cspeed dspeed cratio cratio * cspeed cratio * dspeed cratio * cspeed * dspeed
0 bitshuffle 1 BLOSCLZ 7.223588 54.313857 8.390778 83.393606 540.985075 5528.466227
1 bitshuffle 1 LZ4 8.848198 66.887913 11.659186 116.149040 844.633970 8513.596217
2 bitshuffle 1 LZ4HC 5.053311 66.329630 12.829870 91.101699 928.633857 6770.801956
3 bitshuffle 1 ZLIB 6.167156 23.843989 10.984466 83.765315 295.813701 2309.102172
4 bitshuffle 1 ZSTD 9.724077 43.090349 16.577713 242.189224 808.116685 12248.983854
... ... ... ... ... ... ... ... ... ...
75 shuffle 9 BLOSCLZ 8.332849 64.524961 10.812125 118.651751 908.799349 10583.260343
76 shuffle 9 LZ4 7.950522 85.191965 10.846365 115.143071 1134.290091 12583.507254
77 shuffle 9 LZ4HC 1.485687 91.912129 13.622630 35.753923 1490.752072 4292.904092
78 shuffle 9 ZLIB 0.747198 11.012489 17.034856 19.769907 185.638944 196.426216
79 shuffle 9 ZSTD 0.198090 41.813120 19.582059 4.579108 1148.469775 281.720531

80 rows × 9 columns

In [8]:
fig = px.bar(df_mean, y="cratio", x="codec", color="filter", category_orders=category_orders,
             barmode="group", facet_col="clevel", labels=labels, title="Compression ratio (mean)")
fig.show()
In [9]:
fig = px.bar(df_mean, y="cspeed", x="codec", color="filter", category_orders=category_orders,
             barmode="group", facet_col="clevel", labels=labels, title="Compression speed (mean)")
fig.show()
In [10]:
fig = px.bar(df_mean2, y="cspeed", x="filter", facet_col="dset", color="filter", log_y=True,
             labels=labels, category_orders=category_orders)
fig.show()
In [11]:
fig = px.strip(df, y="cspeed", x="codec", color="filter", hover_data=hover_data, labels=labels)
fig.show()
In [12]:
fig = px.bar(df_mean, y="dspeed", x="codec", color="filter",
             category_orders=category_orders, barmode="group",
             facet_col="clevel", labels=labels, title="Decompression speed (mean)")
fig.show()
In [13]:
fig = px.bar(df_mean2, y="dspeed", x="filter", facet_col="dset", color="filter", log_y=True,
             labels=labels, category_orders=category_orders)
fig.show()
In [14]:
fig = px.strip(df, y="dspeed", x="codec", color="filter", hover_data=hover_data, labels=labels)
fig.show()
In [15]:
hover_data = {"filter": True, "codec": True, "cratio": ':.1f', "cspeed": ':.2f',
             "dspeed": ':.2f', "dset": True, "clevel": True}
fig = px.scatter(df, y="cratio", x="cspeed", color="filter", log_y=True,
                 hover_data=hover_data, labels=labels)
fig.show()
In [16]:
fig = px.box(df, y="cratio * cspeed", x="codec", color="filter", log_y=True,
             hover_data=hover_data, labels=labels)
fig.show()
In [17]:
fig = px.bar(df_mean, y="cratio * cspeed", x="codec", color="filter", log_y=True,
             labels=labels, facet_col="clevel", barmode="group", category_orders=category_orders)
fig.show()
In [18]:
fig = px.bar(df_mean2, y="cratio * cspeed", x="filter", facet_col="dset", color="filter", log_y=True,
             labels=labels, category_orders=category_orders)
fig.show()
In [19]:
hover_data = {"filter": True, "codec": True, "cratio": ':.1f', "cspeed": ':.2f',
             "dspeed": ':.2f', "dset": True, "clevel": True}
fig = px.scatter(df, y="cratio", x="dspeed", color="filter", log_y=True,
              hover_data=hover_data, labels=labels)
fig.show()
In [20]:
fig = px.box(df, y="cratio * dspeed", x="codec", color="filter", log_y=True,
             hover_data=hover_data, labels=labels, category_orders=category_orders)
fig.show()
In [21]:
fig = px.bar(df_mean, y="cratio * dspeed", x="codec", color="filter", log_y=True,
             labels=labels, facet_col="clevel", barmode="group", category_orders=category_orders)
fig.show()
In [22]:
fig = px.bar(df_mean2, y="cratio * dspeed", x="filter", facet_col="dset", color="filter", log_y=True,
             labels=labels, category_orders=category_orders)
fig.show()
In [23]:
fig = px.box(df, y="cratio * cspeed * dspeed", x="codec", color="filter",
             log_y=True, hover_data=hover_data, labels=labels, category_orders=category_orders)
fig.show()
In [24]:
fig = px.bar(df_mean, y="cratio * cspeed * dspeed", x="codec", color="filter", log_y=True,
             labels=labels, facet_col="clevel", barmode="group", category_orders=category_orders)
fig.show()
In [25]:
fig = px.bar(df_mean2, y="cratio * cspeed * dspeed", x="filter", facet_col="dset", color="filter", log_y=True,
             labels=labels, category_orders=category_orders)
fig.show()
In [ ]: